# Alex Gazmararian
# agazmararian@gmail.com

library(tidyverse)
library(tidylog)
library(sf)
library(terra)
library(lwgeom)
library(exactextractr)
library(here)
library(glue)

# Replication mode detection ----
# In anonymized mode, use cached population density results
source(here("R", "visibility", "replication_mode.R"))
REPLICATION_MODE <- init_replication_mode()

# Use centralized cache path (in data/cache/, not data/inter/)
cache_file <- get_popdensity_cache_path()

if (REPLICATION_MODE == "anonymized") {
  message("=== ANONYMIZED MODE ===")
  if (file.exists(cache_file)) {
    message("Using cached population density results (coordinates not available)")
    message("Cache file: ", cache_file)
  } else {
    stop("Anonymized mode requires cached population density. File not found: ", cache_file)
  }
} else {

df <- readRDS(here("data", "inter", "survey_visibility_processed_with_geo.rds"))

pop <- terra::rast(here("data", "input", "worldpop", "ppp_2020_1km_Aggregated.tif"))

# Transform to projected CRS for accurate distance calculations
# Using North America Albers Equal Area Conic projection
crs.proj <- "ESRI:102003"
df_proj <- st_transform(df, crs = crs.proj)

# Create 25km buffer using projected coordinates
buffer.int <- st_buffer(df_proj, dist = 25 * 1000) # 25 kilometers around a zip code

# Transform back to match population raster CRS for extraction
df <- st_transform(df, crs = 4326)
buffer.int <- st_transform(buffer.int, crs = 4326)

if (crs(df) != crs(pop)) {
  df <- st_transform(df, crs = crs(pop))
}

buffer.pop <- exact_extract(pop, buffer.int, "sum")
buffer.int$pop <- buffer.pop
buffer.int$area <- st_area(buffer.int)

df$popd <- as.numeric(with(buffer.int, pop / area) * 1000^2) # people per square kilometer

df$popd_abovemed <- ifelse(df$popd > median(df$popd), 1, 0)
df$popd_100 <- ifelse(df$popd < 100, 1, 0)
df$popd_75 <- ifelse(df$popd < 75, 1, 0)
df$popd_50 <- ifelse(df$popd < 50, 1, 0)
df$popd_25 <- ifelse(df$popd < 25, 1, 0)

df.out <- subset(df, select = c(response_id, popd, popd_abovemed, popd_100:popd_25))

df.out <- st_drop_geometry(df.out)

# Save to cache directory (persists across runs, used by anonymized mode)
write_csv(df.out, get_popdensity_cache_path())
message(glue("[OK] Survey data with population density saved to: {get_popdensity_cache_path()}"))

} # End of REPLICATION_MODE == "full" block